In [2]:
    
%pylab inline
from tools import *
from PythonWrapper.descriptors import *
    
    
In [3]:
    
lbf_descs = np.load("../descriptors/lbp_lfwa.npy")
lbf_descs = lbf_descs.reshape((lbf_descs.shape[0], lbf_descs.shape[1]/256, 256))
    
In [18]:
    
variances = np.var(lbf_descs, axis=(0,1))
variances_percentage = variances / np.sum(variances)
plot(variances_percentage)
xlabel("LBP values")
ylabel("Variance distribution")
_ = xlim(xmax=255)
    
    
In [5]:
    
def isUniform(x):
    uniform = True
    prev_x = x / 128
    transitions = 0
    
    for i in range(8):
        is_transition = (x%2 != prev_x%2)
        if is_transition:
            transitions += 1
        if transitions > 2:
            uniform = False
            break
        prev_x = x
        x /= 2
        
    return uniform
    
In [6]:
    
sorted_variances = sorted(enumerate(variances_percentage), key=lambda x: x[1], reverse=True)
print map(lambda x: isUniform(x[0]), sorted_variances[:60])
print "Number of uniform LBP in the 58 biggest variances components: %d"%np.sum(map(lambda x: isUniform(x[0]), sorted_variances[:58]))
    
    
In [12]:
    
%config InlineBackend
    
    
In [16]:
    
cum_sum = np.cumsum(sorted(variances_percentage, reverse=True))
print len(cum_sum[cum_sum<0.98])
print len(cum_sum[cum_sum<0.9])
print cum_sum[58-1]
xlabel("Number of dimensions")
ylabel("Cumulative variance distribution")
plot(cum_sum)
xlim(xmax=255)
    
    
    
In [1]:
    
indexes = (cum_sum < 0.99)
[(format(int(i), '08b'), int(i), x, cum) for (i,x),cum in zip(np.asarray(sorted_variances)[indexes], cum_sum[indexes])]
    
    
In [5]:
    
import random
from sklearn.decomposition import PCA
pca = PCA()
descs = lbf_descs.reshape(lbf_descs.shape[0]*lbf_descs.shape[1], lbf_descs.shape[2])
indexes = random.sample(range(descs.shape[0]), 10000)
pca.fit(descs[indexes])
    
    Out[5]:
In [10]:
    
cumsum = np.cumsum(pca.explained_variance_ratio_)
print len(cumsum[cumsum < 0.98])
print len(cumsum[cumsum < 0.9])
print cumsum[58-1]
plot(cumsum)
xlabel("Number of dimensions")
ylabel("Cumulative variance distribution")
xlim(xmax=255)
#axhline(0.98, color="r")
#axhline(0.9, color="g")
    
    
    Out[10]:
    
In [22]:
    
import random
from sklearn.decomposition import SparsePCA
sparse_pca = SparsePCA(alpha=0.5)
descs = lbf_descs.reshape(lbf_descs.shape[0]*lbf_descs.shape[1], lbf_descs.shape[2])
indexes = random.sample(range(descs.shape[0]), 10000)
sparse_pca.fit(descs[indexes])
    
    Out[22]:
In [23]:
    
print np.where(sparse_pca.components_[0] != 0)
    
    
In [24]:
    
non_null_components = [list(np.where(sparse_pca.components_[i] != 0)[0]) for i in range(sparse_pca.components_.shape[0])]
print non_null_components
    
    
In [25]:
    
for component in non_null_components:
    output = ""
    for i in component:
        output += "(%d, %s) "%(i, str(isUniform(i)))
    if len(output) > 0:
        print output
    
    
In [26]:
    
var = np.var(sparse_pca.transform(descs), axis=0)
    
In [11]:
    
ulbf_descs = np.load("descriptors/ulbp_lfwa.npy")
ulbf_descs = ulbf_descs.reshape((ulbf_descs.shape[0], ulbf_descs.shape[1]/59, 59))
variances = np.var(ulbf_descs, axis=(0,1))
variances_percentage = variances / np.sum(variances)
plot(variances_percentage)
    
    Out[11]:
    
In [12]:
    
print "Number of components over 1%% of variance: %d"%len(variances_percentage[variances_percentage>0.005])
cum_sum = np.cumsum(sorted(variances_percentage, reverse=True))
print len(cum_sum[cum_sum<0.99]), len(cum_sum[cum_sum<0.9]), len(variances_percentage[variances_percentage>0.01])
plot(cum_sum)
    
    
    Out[12]:
    
In [13]:
    
pca = PCA()
descs = ulbf_descs.reshape(lbf_descs.shape[0]*lbf_descs.shape[1], lbf_descs.shape[2])
indexes = random.sample(range(descs.shape[0]), 10000)
pca.fit(descs[indexes])
    
    Out[13]:
In [14]:
    
cumsum = np.cumsum(pca.explained_variance_ratio_)
print len(cumsum[cumsum < 0.98])
print len(cumsum[cumsum < 0.9])
plot(cumsum)
    
    
    Out[14]:
    
In [ ]: